import plotly
plotly.offline.init_notebook_mode()
from sklearn.datasets import load_iris
import pandas as pd
data_iris = load_iris()
df_iris = pd.DataFrame(data_iris.data,columns=data_iris.feature_names)
df_iris
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 |
| ... | ... | ... | ... | ... |
| 145 | 6.7 | 3.0 | 5.2 | 2.3 |
| 146 | 6.3 | 2.5 | 5.0 | 1.9 |
| 147 | 6.5 | 3.0 | 5.2 | 2.0 |
| 148 | 6.2 | 3.4 | 5.4 | 2.3 |
| 149 | 5.9 | 3.0 | 5.1 | 1.8 |
150 rows × 4 columns
df_iris['y'] = data_iris.target_names[data_iris.target] == 'virginica'
df_iris.head()
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | y | |
|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | False |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | False |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | False |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | False |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | False |
virginica = df_iris[df_iris['y'] == True]
not_virginica = df_iris[df_iris['y'] == False]
virginica.describe()
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
|---|---|---|---|---|
| count | 50.00000 | 50.000000 | 50.000000 | 50.00000 |
| mean | 6.58800 | 2.974000 | 5.552000 | 2.02600 |
| std | 0.63588 | 0.322497 | 0.551895 | 0.27465 |
| min | 4.90000 | 2.200000 | 4.500000 | 1.40000 |
| 25% | 6.22500 | 2.800000 | 5.100000 | 1.80000 |
| 50% | 6.50000 | 3.000000 | 5.550000 | 2.00000 |
| 75% | 6.90000 | 3.175000 | 5.875000 | 2.30000 |
| max | 7.90000 | 3.800000 | 6.900000 | 2.50000 |
not_virginica.describe()
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
|---|---|---|---|---|
| count | 100.000000 | 100.000000 | 100.000000 | 100.000000 |
| mean | 5.471000 | 3.099000 | 2.861000 | 0.786000 |
| std | 0.641698 | 0.478739 | 1.449549 | 0.565153 |
| min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
| 25% | 5.000000 | 2.800000 | 1.500000 | 0.200000 |
| 50% | 5.400000 | 3.050000 | 2.450000 | 0.800000 |
| 75% | 5.900000 | 3.400000 | 4.325000 | 1.300000 |
| max | 7.000000 | 4.400000 | 5.100000 | 1.800000 |
import seaborn as sns
import matplotlib.pyplot as plt
for col in df_iris.columns[0:4]:
sns.histplot(data=df_iris,hue='y',x=col,bins=10,palette='viridis')
plt.legend(['Not Virginica','Virginica'])
plt.show()
d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning. d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning. d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning. d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning. d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
df_iris_corr = df_iris[df_iris.columns[0:4]].corr()
print(df_iris_corr)
plt.figure(figsize=(12,10))
sns.heatmap(df_iris_corr, annot=True)
plt.title('Heatmap of Correlation Matrix of Iris Dataset')
plt.show()
sepal length (cm) sepal width (cm) petal length (cm) \
sepal length (cm) 1.000000 -0.117570 0.871754
sepal width (cm) -0.117570 1.000000 -0.428440
petal length (cm) 0.871754 -0.428440 1.000000
petal width (cm) 0.817941 -0.366126 0.962865
petal width (cm)
sepal length (cm) 0.817941
sepal width (cm) -0.366126
petal length (cm) 0.962865
petal width (cm) 1.000000
import plotly.express as px
custom_colors = ['#1f77b4', 'skyblue', 'cornflowerblue']
# Create a pie chart with custom colors
fig_pie = px.pie(values=df_iris['y'].value_counts(),
names=['Not Virginica','Virginica'],
title='Pie Chart of Species Counts',
color_discrete_sequence=custom_colors)
# Show the pie chart
fig_pie.show()
Observation :
fig_boxplot = px.box(df_iris, x="y", y="sepal length (cm)", title="Box Plot of Sepal Length by Species",
color='y', # Set box color based on Species
color_discrete_sequence=custom_colors, # Custom color sequence
)
# Update layout
fig_boxplot.update_layout(xaxis_title="Species", yaxis_title="Sepal Length",xaxis=dict(showticklabels=False))
# Update legends
fig_boxplot.update_traces(name = "Virginica", selector=dict(name="True"))
fig_boxplot.update_traces(name = "Non Virginica", selector=dict(name="False"))
# Show the box plot
fig_boxplot.show()
d:\AI & ML\ML FOUNDATION\LABS\CSCN8010\venv\CSCN8010_classic_ml\Lib\site-packages\plotly\express\_core.py:1985: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
from sklearn.model_selection import train_test_split
X_train, X_old, y_train, y_old = train_test_split(df_iris[df_iris.columns[0:4]], df_iris['y'], test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_old, y_old, test_size=0.5, random_state=42)